perm filename QSORT[AP,SYS] blob
sn#000469 filedate 1972-09-25 generic text, type T, neo UTF8
COMMENT ⊗ VALID 00010 PAGES
RECORD PAGE DESCRIPTION
00001 00001
00005 00002 Definitions.
00006 00003 Storage allocations.
00008 00004 Start of main program. Read in all strings in input file.
00012 00005 Sort the strings using algorithm 5.2.2-Q in Knuth, "quicksort".
00015 00006 Continue sorting: Q4, Q5, Q6.
00017 00007 Continue sorting: Q7, Q8.
00019 00008 Continue sorting: Q8B, Q8C, Q9.
00022 00009 Write out sorted file: WRITEM.
00026 00010 Subroutines: GETCH, PUTWD, PUTCH, ERROR, DUP, PUTDUP, NXTDG.
00031 ENDMK
⊗;
;Definitions.
TITLE QSORT
AC0←←0
AC1←1
AC2←2
BEFORE←3
BPTR←←4
CHAR←←5
WD←←6
PREV←←7
PART1←4
PART2←5
PART3←6
PART4←7
PART5←10
PART6←11
PART7←12
AFTER←13
R←14
I←15
J←16
P←17
CR←←15
LF←←12
TAB←←11
FF←←14
TLEN←←10000
LSTLEN←←4000
PDLEN←←=100
MIN←←10 ;the minimum number of elements for using quicksort
DEFINE ERRMSG(MSG)
{PUSHJ P, [MOVEM AC1,SAVEAC
MOVEI AC1,[ASCIZ \MSG\]
JRST ERROR]}
;Storage allocations.
PDLIST: BLOCK PDLEN
IBUF: BLOCK 3 ;buffer header for reading in WORDS.TXT
OBUF: BLOCK 3 ;buffer header for writing out WORDS.SRT
DBUF: BLOCK 3 ;buffer header for writing out WORDS.DUP
INFILE: SIXBIT /WORDS/ ;LOOKUP block
SIXBIT /TXT/
BLOCK 2
OUTFIL: SIXBIT /WORDS/ ;ENTER block
SIXBIT /SRT/
BLOCK 2
DUPF: SIXBIT /WORDS/ ;ENTER block
SIXBIT /DUP/
BLOCK 2
;TEXT is a block for storing the characters of the strings being sorted
TEXT: OCT 400000000000 ;a key of -∞
BLOCK TLEN
;LST is a block for keeping the (somewhat) sorted list of strings.
; The left half of a word contains the negative of the length of the
; text for that string. The right half contains a ptr to its text.
LST: XWD -1,TEXT-1
BLOCK LSTLEN
LFT: LST+1 ;address of the leftmost element of the sublist under consideration
RGT: 0 ;address of the rightmost element of the sublist under consideration
LAST: 0
SAVEAC: 0 ;place for saving AC1 upon detection of an error
RSTART: 0 ;flag to prevent restarting of QSORT
COUNT: 0 ;count of the number of words going into output file
DIGITS: BLOCK 4 ;block for holding asciz digits of a number
;Start of main program. Read in all strings in input file.
QSORT: SKIPE RSTART
JRST [OUTSTR [ASCIZ /QSORT CANNOT BE RESTARTED/]
CALL [SIXBIT /EXIT/]]
SETOM RSTART
MOVE P,[INITP: IOWD PDLEN,PDLIST];initialize pdl ptr
INIT 1,0
SIXBIT /DSK/
IBUF
ERRMSG {INIT FAILED ON DSK}
LOOKUP 1,INFILE
ERRMSG {LOOKUP FAILED ON INPUT FILE}
MOVEI AC0,"@"
MOVE BPTR,[POINT 7,TEXT,34] ;init byte ptr for saving text of input words
MOVE WD,[XWD -LSTLEN,LST+1] ;init ptr to list of strings being sorted
OUTSTR [ASCIZ /READING.../]
FINDFF: PUSHJ P,GETCH
CAIE CHAR,FF ;skip directory page of WORDS.TXT, a TV file
JRST FINDFF
GETWD: HRRZM BPTR,(WD) ;save ptr to place for text of next word
GETWD1: MOVE PREV,BPTR ;save byte ptr for calculating length of word
GETLTR: PUSHJ P,GETCH
CAIG CHAR," " ;any char > space is considered part of input word
JRST NOTLTR
IDPB CHAR,BPTR ;save this char in TEXT
JRST GETLTR ;get next char
NOTLTR: CAIE CHAR,TAB ;tabs and spaces can separate parts of multiple
CAIN CHAR," " ; word keys.
JRST [CHK: PUSHJ P,GETCH ;find first non-tab, non-space char
CAIGE CHAR,"0"
JRST [CAIE CHAR,TAB ;consecutive spaces or tabs are
CAIN CHAR," " ; equivalent to one space
JRST CHK
CAIGE CHAR," " ;any char less than space ends the
JRST DELIM ; current word
JRST PUTAT] ;any other char is part of word
PUTAT: IDPB AC0,BPTR ;replace the tab or space with a "@"
IDPB CHAR,BPTR ;save the char after the tab or space
JRST GETLTR]
DELIM: CAIN CHAR,"⊗" ;"⊗" marks the beginning of a comment in
JRST [FINDCR: PUSHJ P,GETCH ; the input file. the comment
CAIE CHAR,CR ; continues up to the next
JRST FINDCR ; carriage return
JRST READLF]
CAIE CHAR,CR ;carriage return marks the end of a word
JRST GETLTR ;otherwise, the word continues (with some strange char)
READLF: PUSHJ P,GETCH ;read the lf that follows the cr
IDPB AC0,BPTR ;put an @ after the text of this word
TLNE BPTR,760000 ;if @ is at end of word, put another @
JRST [IBP BPTR ;otherwise, put a zero byte
JRST .+2]
FINWRD: IDPB AC0,BPTR ;put another @ to fill up the last word
TLNE BPTR,760000 ;now at low order byte?
JRST FINWRD ;no
SUB PREV,BPTR ;calculate the number of words in this word
HRLM PREV,(WD) ;store the length of this word in its LST entry
CAMLE BPTR,[POINT 7,TEXT+TLEN,34]
ERRMSG {TOO MANY WORDS. NO TEXT SPACE LEFT.}
AOBJN WD,GETWD
ERRMSG {TOO MANY WORDS. NO LIST SPACE LEFT.}
;Sort the strings using algorithm 5.2.2-Q in Knuth, "quicksort".
SORTEM: TLNE BPTR,760000
ERRMSG {EOF IN MIDDLE OF KEYWORD}
MOVE AC1,[377777777777] ;place a key of +∞ at the end
MOVEM AC1,1(BPTR) ; of the list of strings
MOVEI AC1,-1
HRLM AC1,(WD) ;store length of the +∞ key
HRRM BPTR,(WD) ;store text ptr for the +∞ key
SUBI WD,1 ;adjust the ptr to the last real key
HRRZM WD,RGT ; and sort up to this key
HRRZM WD,LAST
OUTSTR [ASCIZ /SORTING.../]
Q2: MOVE AC1,RGT ;if RGT-LFT < MIN then use straight
SUB AC1,LFT ; insertion sorting instead
CAIGE AC1,MIN ; of quicksort
JRST Q8 ;use straight insertion sorting
MOVE I,LFT ;I←LFT
MOVE J,RGT ;J←RGT
MOVE R,(I) ;R←R(I) (the Ith record being sorted)
MOVE PART1,1(R) ;load the current keyword string into
MOVE PART2,2(R) ; accumulators PART1 thru PART7
MOVE PART3,3(R)
MOVE PART4,4(R)
MOVE PART5,5(R)
MOVE PART6,6(R)
MOVE PART7,7(R)
Q3: HLRE AC1,R ;get negated length of current key into AC1
MOVE AC2,(J) ;put the Jth record into AC2
CAME PART1,1(AC2) ;compare the respective parts of record R
JRST [CAML PART1,1(AC2) ; and the Jth record
JRST Q4 ;Jth key ≤ key of record R
SOJA J,Q3] ;Jth key > key of record R
AOJGE AC1,Q4 ;if AC1=0 then Jth key = key of record R
CAME PART2,2(AC2)
JRST [CAML PART2,2(AC2)
JRST Q4
SOJA J,Q3]
AOJGE AC1,Q4
CAME PART3,3(AC2)
JRST [CAML PART3,3(AC2)
JRST Q4
SOJA J,Q3]
AOJGE AC1,Q4
CAME PART4,4(AC2)
JRST [CAML PART4,4(AC2)
JRST Q4
SOJA J,Q3]
AOJGE AC1,Q4
CAME PART5,5(AC2)
JRST [CAML PART5,5(AC2)
JRST Q4
SOJA J,Q3]
AOJGE AC1,Q4
CAME PART6,6(AC2)
JRST [CAML PART6,6(AC2)
JRST Q4
SOJA J,Q3]
AOJGE AC1,Q4
CAME PART7,7(AC2)
JRST [CAML PART7,7(AC2)
JRST Q4
SOJA J,Q3]
;Continue sorting: Q4, Q5, Q6.
Q4: CAMGE I,J
JRST .+3 ;I<J
MOVEM R,(I) ;I≥J. R←Ith record.
JRST Q7
MOVEM AC2,(I) ;I<J. Ith record ← Jth record
ADDI I,1 ;I←I+1
Q5: HLRE AC1,R ;get negated length of record R into AC1
MOVE AC2,(I) ;get Ith record into AC2
CAME PART1,1(AC2) ;compare Ith key with key of record R
JRST [CAMG PART1,1(AC2)
JRST Q6 ;key of record R ≤ Ith key
AOJA I,Q5] ;key of record R > Ith key
AOJGE AC1,Q6 ;AC1=0 means key of record R = Ith key
CAME PART2,2(AC2)
JRST [CAMG PART2,2(AC2)
JRST Q6
AOJA I,Q5]
AOJGE AC1,Q6
CAME PART3,3(AC2)
JRST [CAMG PART3,3(AC2)
JRST Q6
AOJA I,Q5]
AOJGE AC1,Q6
CAME PART4,4(AC2)
JRST [CAMG PART4,4(AC2)
JRST Q6
AOJA I,Q5]
AOJGE AC1,Q6
CAME PART5,5(AC2)
JRST [CAMG PART5,5(AC2)
JRST Q6
AOJA I,Q5]
AOJGE AC1,Q6
CAME PART6,6(AC2)
JRST [CAMG PART6,6(AC2)
JRST Q6
AOJA I,Q5]
AOJGE AC1,Q6
CAME PART7,7(AC2)
JRST [CAMG PART7,7(AC2)
JRST Q6
AOJA I,Q5]
Q6: CAMGE J,I
JRST .+3 ;I<J
MOVEM AC2,(J) ;I≥J. Jth record ← Ith record
SOJA J,Q3 ;J←J-1
MOVEM R,(J) ;I<J. Jth record ← record R
MOVEM J,I ;I←J
;Continue sorting: Q7, Q8.
;record R is now in its final place, dividing the list into two sublists.
;continue by sorting the smaller sublist next.
Q7: MOVE AC2,I ;AC2 ← I
ASH AC2,1 ;AC2 ← 2*I
SUB AC2,LFT ;AC2 ← 2*I - LFT
CAMLE AC2,RGT ;is 2*I - LFT ≤ RGT ? (ie I-LFT ≤ RGT -I)
JRST Q7A ;no
MOVE AC2,I ;yes
ADDI AC2,1
PUSH P,AC2 ;save (on the stack) the sublist from I+1 to RGT
PUSH P,RGT
SUBI AC2,2
MOVEM AC2,RGT ;RGT ← I-1
JRST Q2
Q7A: PUSH P,LFT ;save (on the stack) the sublist from LFT to I-1
MOVE AC2,I
SUBI AC2,1
PUSH P,AC2
ADDI AC2,2
MOVEM AC2,LFT ;LFT ← I+1
JRST Q2
;prepare to sort from LFT to RGT by straight insertion
Q8: AOS J,LFT ;J ← LFT + 1
Q8A: CAMLE J,RGT ;insert record J into the sorted list unless J > RGT
JRST Q9 ;insertion sort is finished
MOVE R,(J) ;record R ← Jth record
MOVE PART1,1(R) ;load the parts of the key of record R into ACs
MOVE PART2,2(R)
MOVE PART3,3(R)
MOVE PART4,4(R)
MOVE PART5,5(R)
MOVE PART6,6(R)
MOVE PART7,7(R)
MOVEI I,-1(J) ;I ← J-1
;Continue sorting: Q8B, Q8C, Q9.
;insertion sorting for small numbers of elements (continued).
Q8B: MOVE AC2,(I) ;put the Ith record into AC2
HLRE AC1,R ;get the length of the key of record R into AC1
CAME PART1,1(AC2) ;compare the Ith key with the key of record R
JRST [CAML PART1,1(AC2)
JRST Q8C ;key of record R ≥ Ith key
OVER: MOVE AC1,(I) ;key of record R < Ith key. move the Ith
MOVEM AC1,1(I) ; record over one to the right
SOJA I,Q8B] ;I ← I-1. get the new Ith record
AOJGE AC1,Q8C ;AC1=0 means key of record R = Ith key
CAME PART2,2(AC2)
JRST [CAML PART2,2(AC2)
JRST Q8C
JRST OVER]
AOJGE AC1,Q8C
CAME PART3,3(AC2)
JRST [CAML PART3,3(AC2)
JRST Q8C
JRST OVER]
AOJGE AC1,Q8C
CAME PART4,4(AC2)
JRST [CAML PART4,4(AC2)
JRST Q8C
JRST OVER]
AOJGE AC1,Q8C
CAME PART5,5(AC2)
JRST [CAML PART5,5(AC2)
JRST Q8C
JRST OVER]
AOJGE AC1,Q8C
CAME PART6,6(AC2)
JRST [CAML PART6,6(AC2)
JRST Q8C
JRST OVER]
AOJGE AC1,Q8C
CAME PART7,7(AC2)
JRST [CAML PART7,7(AC2)
JRST Q8C
JRST OVER]
Q8C: MOVEM R,1(I) ;found the place in the sorted list for record R
AOJA J,Q8A ;J ← J+1. get next key to be inserted
Q9: CAMN P,INITP ;is the stack of empty of sublists to be sorted?
JRST WRITEM ;yes. everything is sorted so write out the results
POP P,RGT ;no. pop a sublist off
POP P,LFT ; the stack and
JRST Q2 ; go sort it
;Write out sorted file: WRITEM.
WRITEM: INIT 2,0
SIXBIT /DSK/
XWD OBUF,0
ERRMSG {INIT FAILED ON DSK}
ENTER 2,OUTFIL
ERRMSG {ENTER FAILED ON OUTPUT FILE}
INIT 3,0
SIXBIT /DSK/
XWD DBUF,0
ERRMSG {INIT FAILED ON DSK}
ENTER 3,DUPF
ERRMSG {ENTER FAILED ON FILE FOR DUPLICATE WORDS}
OUTSTR [ASCIZ /
DUPLICATES: /]
MOVEI WD,LST+1 ;make WD point at first element of sorted list
MOVE AFTER,[JUMP TEXT-1(AC2)];init previous key to key of -∞
NEXTWD: HRRZ BPTR,(WD) ;set up byte ptr to text of current key
HRLI BPTR,700
MOVE BEFORE,AFTER ;save indirect ptr to text of previous key
HRR AFTER,BPTR ;set up indirect ptr to text of current key
HLLZ AC2,(WD) ;put negated length of current key in left of AC2
ADDI AC2,1 ;put displacement of 1 into right half of AC2
CMPR: MOVE PART7,@AFTER ;get one part of current key and compare
CAME PART7,@BEFORE ; it to corresponding part of old key
JRST NEXTCH ;the corresponding parts are not the same
AOBJN AC2,CMPR ;they are the same. get next part of each, if any.
JRST DUP ;all parts of the previous and current keys were samm
NEXTCH: ILDB CHAR,BPTR ;get a char of current key
CAIN CHAR,"@" ;is it a "@"?
JRST [ILDB AC1,BPTR ;yes. get immediately following char
CAIN AC1,"@" ;if it is "@", then first "@" ended the key
JRST ENDWD
JUMPE AC1,ENDWD ;if it is zero, then the "@" ended the key
PUSHJ P,PUTCH ;otherwise, output the "@" and
MOVE CHAR,AC1 ; the following char
JRST .+1]
PUSHJ P,PUTCH ;output the char to the file of sorted keys
JRST NEXTCH ;get the next char in the key, if any
ENDWD: MOVEI CHAR,CR ;output a CR and a LF after the key in
PUSHJ P,PUTCH ; the file of sorted keys
MOVEI CHAR,LF
PUSHJ P,PUTCH
AOS COUNT ;count the number of keys (not including duplicates)
FINWD: CAMGE WD,LAST ;have we gotten to the last of the sorted keys?
AOJA WD,NEXTWD ;no. go back and get the next one.
RELEAS 2, ;yes. close the output file
RELEAS 3, ;close the file of duplicate keywords
OUTSTR [ASCIZ /
/]
MOVE AC1,COUNT ;convert the number of keys to ascii
MOVE BPTR,[POINT 7,DIGITS]
PUSHJ P,NXTDG
SETZ AC2,
IDPB AC2,BPTR
OUTSTR DIGITS ;print out the number of keys (not including duplicates)
OUTSTR [ASCIZ / SORTED WORDS IN WORDS.SRT
DUPLICATE WORDS IN WORDS.DUP
/]
CALL [SIXBIT /EXIT/] ;bye bye
;Subroutines: GETCH, PUTWD, PUTCH, ERROR, DUP, PUTDUP, NXTDG.
;get a character from the input file.
GETCH: SOSG IBUF+2 ;decrement byte count
IN 1, ;buffer emptied. get another
JRST [ILDB CHAR,IBUF+1 ;load a character into CHAR
JUMPE CHAR,GETCH ;if the char is a null, get another char
POPJ P,]
STATO 1,20000 ;test for EOF
ERRMSG {UNKNOWN ERROR CONDITION CAME UP ON INPUT}
SUB P,[XWD 1,1] ;pop return address off the stack
JRST SORTEM ;go sort the keys that have been read in
;output a character to the file of sorted keys.
PUTCH: SOSG OBUF+2 ;decrement byte count
OUT 2, ;buffer filled. output it.
JRST [IDPB CHAR,OBUF+1 ;deposit a character into the output buffer
POPJ P,]
ERRMSG {UNKNOWN ERROR CONDITION CAME UP ON OUTPUT}
;print out an error message on the tty.
ERROR: OUTSTR [CRLFS: ASCIZ /
/]
OUTSTR (AC1)
OUTSTR CRLFS
MOVE AC1,SAVEAC
CALL 1,[SIXBIT /EXIT/]
;print out a duplicate keyword on the tty and write it into the file of duplicates.
DUP: ILDB CHAR,BPTR
CAIN CHAR,"@"
JRST FINWD ;this is a duplicate null word
OUTSTR (BPTR) ;type out the keyword
OUTCHR [" "]
PUSHJ P,PUTDUP
NXTDCH: ILDB CHAR,BPTR ;get a char of the keyword
CAIN CHAR,"@" ;is it a "@"?
JRST [ILDB AC1,BPTR ;yes. get the following char
CAIN AC1,"@" ;if it's "@", then first "@" ended the key
JRST ENDDWD
JUMPE AC1,ENDDWD ;if it's null, the "@" ended the keyword
PUSHJ P,PUTDUP ;otherwise, put the "@" into the dup buffer
MOVE CHAR,AC1 ;put the following char into the dup buffer
JRST .+1]
PUSHJ P,PUTDUP ;write out the char into the file of dup keywords
JRST NXTDCH ;get the next char in the keyword, if any
ENDDWD: MOVEI CHAR,CR ;put a CRLF after each keyword in the file
PUSHJ P,PUTDUP
MOVEI CHAR,LF
PUSHJ P,PUTDUP
JRST FINWD
;put a char of a duplicate keyword into the output buffer for the file of dup keys
PUTDUP: SOSG DBUF+2 ;decrement byte count
OUT 3, ;buffer filled. output it.
JRST [IDPB CHAR,DBUF+1 ;deposit the char into the buffer
POPJ P,]
ERRMSG {UNKNOWN ERROR OCCURRED ON OUTPUT OF DUPLICATE WORD}
;convert a number to ascii, depositing the ascii digits with the byte ptr BPTR
NXTDG: IDIVI AC1,=10 ;divide the number by =10 and
PUSH P,AC2 ; save the remainder
SKIPE AC1 ;if the quotient is zero, the conversion is done
PUSHJ P,NXTDG ;otherwise, calculate the next digit
POP P,AC1 ;get high order digits off stack first
ADDI AC1,60 ;convert current digit to ascii
IDPB AC1,BPTR ;deposit it in ascii string
POPJ P, ;get next digit, or return if all done
END QSORT